# analysis_SI_section_4.R
# This script replicates the analysis in Supporting Information Section 4 (referenced in Section 3 of the Main Text) of the paper
#   Figure SI.8, Table SI.3, Table SI.4 
# Author: Yimeng Li
# Dependencies:
#   Data/VR_VoteCal_LA.Rdata
#   Data/Block_Group_UVBM_Map_new.RData
#   Data/ACS cleaned/ACS_educ_cat_LA_clean.RData
#   Data/ACS cleaned/ACS_hh_income_cat_LA_clean.RData
#   Data/ACS cleaned/ACS_rent_cat_LA_clean.RData
#   Data/ACS cleaned/ACS_value_cat_LA_clean.RData

# Load libraries:
library(dplyr)
library(ggplot2)
library(patchwork)
library(stringr)
library(tidyr)

# Load data:
load("./Replication Package/Data/VR_VoteCal_LA.Rdata")

# Prepare data for analysis
VR_VoteCal_LA <- VR_VoteCal_LA %>%
  mutate(
    Age_18_to_29 = if_else(Age >= 18 & Age <= 29, 1, 0),
    Age_30_to_44 = if_else(Age >= 30 & Age <= 44, 1, 0),
    Age_45_to_64 = if_else(Age >= 45 & Age <= 64, 1, 0),
    Age_65_or_older = if_else(Age >= 65, 1, 0)
  )

#*******************************************************************************
# Figure SI.8
#   Distribution of Distances to the Boundary of Universal and Non-Universal Vote-by-Mail Districts
#*******************************************************************************

# Distribution for Voters living in Universal Vote-by-Mail Districts
Figure_Dist_UVBM <- ggplot(VR_VoteCal_LA %>% filter(UVBM == 1), aes(x = abs_dist/1000)) +
  geom_histogram(boundary = 0, binwidth = 2, color = "black", fill = "#1380A1") +
  scale_x_continuous(breaks = seq(0, 100, 20), limits = c(0, 100)) +
  scale_y_continuous(breaks = seq(0, 300000, 100000), limits = c(0, 360000), labels = scales::comma) +
  labs(title = "Universal Vote-by-Mail Districts",
       x = "Distance to the boundary (kilometers)",
       y = "Number of Reg. Voters") +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        text = element_text(size = 14),
        axis.text.x = element_text(size = 14),
        axis.text.y = element_text(size = 14))

# Distribution for Voters living in Non-Universal Vote-by-Mail Districts
Figure_Dist_NUVBM <- ggplot(VR_VoteCal_LA %>% filter(UVBM == 0), aes(x = abs_dist/1000)) +
  geom_histogram(boundary = 0, binwidth = 2, color = "black", fill = "#1380A1") +
  scale_x_continuous(breaks = seq(0, 100, 20), limits = c(0, 100)) +
  scale_y_continuous(breaks = seq(0, 300000, 100000), limits = c(0, 360000), labels = scales::comma) +
  labs(title = "Non-Universal Vote-by-Mail Districts",
       x = "Distance to the boundary (kilometers)",
       y = "Number of Reg. Voters") +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        text = element_text(size = 14),
        axis.text.x = element_text(size = 14),
        axis.text.y = element_text(size = 14))

# Full Figure
Figure_Dist_combined <- Figure_Dist_UVBM / Figure_Dist_NUVBM

# Save Figure
ggsave("./Replication Package/Figure SI.8.pdf", Figure_Dist_combined,
       width = 8, height = 8, units = "in", dpi = 600)

#*******************************************************************************
#  Table SI.3:
#   Demographic Composition of Control and TreatedAreas
#*******************************************************************************

# Table SI.3
Table_SI3 <- VR_VoteCal_LA %>%
  group_by(UVBM) %>%
  summarise(
    prop_18_to_29 = mean(Age_18_to_29, na.rm = TRUE),
    prop_30_to_44 = mean(Age_30_to_44, na.rm = TRUE),
    prop_45_to_64 = mean(Age_45_to_64, na.rm = TRUE),
    prop_65_or_older = mean(Age_65_or_older, na.rm = TRUE),
    prop_female = mean(gender_inf, na.rm = TRUE),
    prop_white = mean(pred.whi, na.rm = TRUE),
    prop_black = mean(pred.bla, na.rm = TRUE),
    prop_hispanic = mean(pred.his, na.rm = TRUE),
    prop_asian = mean(pred.asi, na.rm = TRUE),
    prop_other = mean(pred.oth, na.rm = TRUE),
    se_18_to_29 = sd(Age_18_to_29, na.rm = TRUE)/sqrt(sum(!is.na(Age_18_to_29))),
    se_30_to_44 = sd(Age_30_to_44, na.rm = TRUE)/sqrt(sum(!is.na(Age_30_to_44))),
    se_45_to_64 = sd(Age_45_to_64, na.rm = TRUE)/sqrt(sum(!is.na(Age_45_to_64))),
    se_65_or_older = sd(Age_65_or_older, na.rm = TRUE)/sqrt(sum(!is.na(Age_65_or_older))),
    se_female = sd(gender_inf, na.rm = TRUE)/sqrt(sum(!is.na(gender_inf))),
    se_white = sd(pred.whi, na.rm = TRUE)/sqrt(sum(!is.na(pred.whi))),
    se_black = sd(pred.bla, na.rm = TRUE)/sqrt(sum(!is.na(pred.bla))),
    se_hispanic = sd(pred.his, na.rm = TRUE)/sqrt(sum(!is.na(pred.his))),
    se_asian = sd(pred.asi, na.rm = TRUE)/sqrt(sum(!is.na(pred.asi))),
    se_other = sd(pred.oth, na.rm = TRUE)/sqrt(sum(!is.na(pred.oth)))
  ) %>%
  pivot_longer(
    cols = !UVBM,
    names_to = c(".value", "variable"),
    names_pattern = "(prop|se)_(.*)"
  ) %>%
  transmute(
    UVBM,
    variable,
    Perc = 100*prop,
    SE = 100*se
  ) %>% 
  pivot_wider(
    names_from = UVBM,
    values_from = c(Perc, SE)
  ) %>%
  mutate(
    Perc_diff = Perc_1 - Perc_0,
    SE_diff = sqrt(SE_1^2 + SE_0^2)
  ) %>%
  relocate(variable, Perc_0, Perc_1, Perc_diff, SE_0, SE_1, SE_diff) %>%
  mutate(across(-variable, ~round(., 1)))

# Save Table
write.csv(Table_SI3, "./Replication Package/Table SI.3.csv", row.names = FALSE, na = '')

#*******************************************************************************
# Table SI.4:
#   Socioeconomic Composition of Control and Treated Areas
#*******************************************************************************

# Import data:
load("./Replication Package/Data/Block_Group_UVBM_Map_new.RData")
load("./Replication Package/Data/ACS cleaned/ACS_educ_cat_LA_clean.RData")
load("./Replication Package/Data/ACS cleaned/ACS_hh_income_cat_LA_clean.RData")
load("./Replication Package/Data/ACS cleaned/ACS_rent_cat_LA_clean.RData")
load("./Replication Package/Data/ACS cleaned/ACS_value_cat_LA_clean.RData")

# Education
balance_educ_cat <- ACS_educ_cat_LA_clean %>%
  right_join(Block_Group_UVBM_Map_new) %>%
  mutate(across(starts_with("N_"), ~ . * prop_reg_by_UVBM)) %>%
  mutate(across(starts_with("MOE_"), ~ . * prop_reg_by_UVBM)) %>%
  group_by(UVBM) %>%
  summarise(
    across(starts_with("N_"), sum, na.rm = TRUE),
    across(starts_with("MOE_"), ~sqrt(sum(.^2, na.rm = TRUE)))
  ) %>%
  mutate(
    across(starts_with("N_"), ~ . / N_total,
           .names = "{str_replace(.col, 'N_', 'Prop_')}"),
    MOE_Prop_no_high_school =
      sqrt(MOE_no_high_school^2 - (Prop_no_high_school^2)*(MOE_total^2))/N_total,
    MOE_Prop_high_school =
      sqrt(MOE_high_school^2 - (Prop_high_school^2)*(MOE_total^2))/N_total,
    MOE_Prop_some_college =
      sqrt(MOE_some_college^2 - (Prop_some_college^2)*(MOE_total^2))/N_total,
    MOE_Prop_bachelor =
      sqrt(MOE_bachelor^2 - (Prop_bachelor^2)*(MOE_total^2))/N_total,
    MOE_Prop_postgraduate =
      sqrt(MOE_postgraduate^2 - (Prop_postgraduate^2)*(MOE_total^2))/N_total
  ) %>%
  select(-Prop_total) %>%
  select(UVBM, starts_with("Prop_"), starts_with("MOE_Prop_"))

# Household income
balance_hh_income_cat <- ACS_hh_income_cat_LA_clean %>%
  right_join(Block_Group_UVBM_Map_new) %>%
  mutate(across(starts_with("N_"), ~ . * prop_reg_by_UVBM)) %>%
  mutate(across(starts_with("MOE_"), ~ . * prop_reg_by_UVBM)) %>%
  group_by(UVBM) %>%
  summarise(
    across(starts_with("N_"), sum, na.rm = TRUE),
    across(starts_with("MOE_"), ~sqrt(sum(.^2, na.rm = TRUE)))
  ) %>%
  mutate(
    across(starts_with("N_"), ~ . / N_total,
           .names = "{str_replace(.col, 'N_', 'Prop_')}"),
    MOE_Prop_less_than_35000 =
      sqrt(MOE_less_than_35000^2 - (Prop_less_than_35000^2)*(MOE_total^2))/N_total,
    MOE_Prop_35000_to_74999 =
      sqrt(MOE_35000_to_74999^2 - (Prop_35000_to_74999^2)*(MOE_total^2))/N_total,
    MOE_Prop_75000_to_124999 =
      sqrt(MOE_75000_to_124999^2 - (Prop_75000_to_124999^2)*(MOE_total^2))/N_total,
    MOE_Prop_125000_or_more =
      sqrt(MOE_125000_or_more^2 - (Prop_125000_or_more^2)*(MOE_total^2))/N_total
  ) %>%
  select(-Prop_total) %>%
  select(UVBM, starts_with("Prop_"), starts_with("MOE_Prop_"))

# Rent
balance_rent_cat <- ACS_rent_cat_LA_clean %>%
  right_join(Block_Group_UVBM_Map_new) %>%
  mutate(across(starts_with("N_"), ~ . * prop_reg_by_UVBM)) %>%
  mutate(across(starts_with("MOE_"), ~ . * prop_reg_by_UVBM)) %>%
  group_by(UVBM) %>%
  summarise(
    across(starts_with("N_"), sum, na.rm = TRUE),
    across(starts_with("MOE_"), ~sqrt(sum(.^2, na.rm = TRUE)))
  ) %>%
  mutate(
    across(starts_with("N_"), ~ . / N_total,
           .names = "{str_replace(.col, 'N_', 'Prop_')}"),
    MOE_Prop_less_than_1000 =
      sqrt(MOE_less_than_1000^2 - (Prop_less_than_1000^2)*(MOE_total^2))/N_total,
    MOE_Prop_1000_to_1499 =
      sqrt(MOE_1000_to_1499^2 - (Prop_1000_to_1499^2)*(MOE_total^2))/N_total,
    MOE_Prop_1500_to_1999 =
      sqrt(MOE_1500_to_1999^2 - (Prop_1500_to_1999^2)*(MOE_total^2))/N_total,
    MOE_Prop_2000_or_more =
      sqrt(MOE_2000_or_more^2 - (Prop_2000_or_more^2)*(MOE_total^2))/N_total
  ) %>%
  select(-Prop_total) %>%
  select(UVBM, starts_with("Prop_"), starts_with("MOE_Prop_"))

# 
balance_value_cat <- ACS_value_cat_LA_clean %>%
  right_join(Block_Group_UVBM_Map_new) %>%
  mutate(across(starts_with("N_"), ~ . * prop_reg_by_UVBM)) %>%
  mutate(across(starts_with("MOE_"), ~ . * prop_reg_by_UVBM)) %>%
  group_by(UVBM) %>%
  summarise(
    across(starts_with("N_"), sum, na.rm = TRUE),
    across(starts_with("MOE_"), ~sqrt(sum(.^2, na.rm = TRUE)))
  ) %>%
  mutate(
    across(starts_with("N_"), ~ . / N_total,
           .names = "{str_replace(.col, 'N_', 'Prop_')}"),
    MOE_Prop_less_than_500k =
      sqrt(MOE_less_than_500k^2 - (Prop_less_than_500k^2)*(MOE_total^2))/N_total,
    MOE_Prop_500k_to_750k =
      sqrt(MOE_500k_to_750k^2 - (Prop_500k_to_750k^2)*(MOE_total^2))/N_total,
    MOE_Prop_750k_to_1m =
      sqrt(MOE_750k_to_1m^2 - (Prop_750k_to_1m^2)*(MOE_total^2))/N_total,
    MOE_Prop_1m_or_more =
      sqrt(MOE_1m_or_more^2 - (Prop_1m_or_more^2)*(MOE_total^2))/N_total
  ) %>%
  select(-Prop_total) %>%
  select(UVBM, starts_with("Prop_"), starts_with("MOE_Prop_"))

# Table SI.4
Table_SI4 <- balance_educ_cat %>%
  left_join(balance_hh_income_cat) %>%
  left_join(balance_rent_cat) %>%
  left_join(balance_value_cat) %>%
  pivot_longer(
    cols = !UVBM,
    names_to = c(".value", "variable"),
    names_pattern = "(Prop|MOE_Prop)_(.*)"
  ) %>%
  transmute(UVBM,
            variable,
            Perc = 100*Prop,
            SE = 100*MOE_Prop/1.645) %>% 
  pivot_wider(names_from = UVBM, values_from = c(Perc, SE)) %>%
  mutate(Perc_diff = Perc_1 - Perc_0,
         SE_diff = sqrt(SE_1^2 + SE_0^2)) %>%
  relocate(variable, Perc_0, Perc_1, Perc_diff, SE_0, SE_1, SE_diff) %>%
  mutate(across(-variable, ~round(., 1)))

# Save Table
write.csv(Table_SI4, "./Replication Package/Table SI.4.csv", row.names = FALSE, na = '')
